Advanced Data Journalism Final Project

Data from: https://ephtracking.cdc.gov/DataExplorer/?c=35&i=88&m=-1

https://ephtracking.cdc.gov/DataExplorer/?c=35&i=88&m=-1

https://public.tableau.com/app/profile/samantha2462/viz/shared/437ZHKXJZ

https://moboscoc.org/resources/data/point-in-time-count-reports/

Step 1: Load libraries

library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

Step 2: Load data

We will be using three data sets. One contains the coordinates and details for cooling centers serving Missouri and the other contains heat-related hospitalizations over the last several years. I also loaded population data from the Census.

coolingcenters <- read_csv("Final Project/Missouri_Cooling_Centers_Sites.csv")

heathospitalizations <- read_csv("Final Project/heatrate.csv")

hud_mo <- read_csv("Final Project/HUDmo.csv")

Step 3: Clean data

Clean & organize cooling centers data.

Our data needs to be properly — and specifically — formatted so it can be easily visualized.

#First, I'm separating the latitude and longitude into separate boxes. 
coolingcentersclean <- coolingcenters %>% 
  separate(Location, sep=", ", into=c("latitude", "longitude"))

#I'm also cleaning up these boxes by removing the parentheses from them.
coolingcentersclean <- coolingcentersclean %>%
 mutate(lat = str_replace(latitude, "[(]", ""))

coolingcentersclean <- coolingcentersclean %>%
 mutate(long = str_replace(longitude, "[)]", ""))

#I'm going to change the state abbreviations to their names so it will be easier to combine with the states data.
coolingcentersclean <- coolingcentersclean %>% mutate(region = case_when(
  grepl("MO", state, ignore.case=T) ~ "missouri",
  grepl("IL", state, ignore.case=T) ~ "illinois",
  grepl("KS", state, ignore.case=T) ~ "kansas"))

#I'm going to remove the improperly formatting latitude and longitude from the dataset. 
coolingcentersclean <- subset (coolingcentersclean, select = -latitude)
coolingcentersclean <- subset (coolingcentersclean, select = -longitude)

#We'll need our coordinates to be numbers so we can plot them.
coolingcentersclean$lat <- as.numeric(coolingcentersclean$lat)
coolingcentersclean$long <- as.numeric(coolingcentersclean$long)

#Lastly, I'll be cleaning the names with janitor. 
coolingcentersclean <- clean_names(coolingcentersclean)

Clean & organize heat data

#This dataset is relatively clean, so I'm just going to clean the names and then remove the column at the end.
heathospitalizations <- clean_names(heathospitalizations)
heathosptializations <- tolower(heathospitalizations$state)

heathospitalizations <- subset (heathospitalizations, select = -x6)

Clean & organize county pop data

county_pop  <- clean_names(county_pop)
county_pop <- county_pop %>% separate(name, sep=",", into=c("county", "state"))
county_pop <- county_pop %>%  mutate(county = str_replace(county, " County", ""))

Clean & organize projected heat data

projectedheat <- clean_names(projectedheat)

Clean & organize HUD data

Step 4: Load map data

states <- map_data("state") %>% filter(region=="missouri" | region == "illinois" | region == "kansas")

statescounties <- map_data("county") %>% filter(region ==  "missouri" | region == "illinois" | region == "kansas")

mocounties <- map_data("county") %>% filter(region ==  "missouri")

countiesall <- map_data("county")

Step 5: Create custom interactive dot map

By using county-level data from the maps package and plotly, we can create our own custom interactive dot map.

staticdotmap_ <- ggplot(statescounties, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill = "#F8F9F9", color = "darkgray", linewidth=.1) +
  geom_point(data=coolingcentersclean, aes(group = NULL, text = paste("Location:", facility)),   alpha=.5, size=.7, color="#EE9253") +   
  coord_fixed(1.3) +
  theme_void() +
  labs(title="Cooling Centers in Missouri", caption = "Data from data.mo.gov") + theme_void()

staticdotmap <- ggplotly(staticdotmap_, tooltip = "text")

Step 6: Create interactive dot map with leaflet

Leaflet is a simpler way to plot data interactively.

interactivedotmap <- leaflet() %>% 
    addProviderTiles(providers$CartoDB.Positron) %>%
    addCircleMarkers(
    data = coolingcentersclean,
    color = "#EE9253",
    opacity = 0.5,
    radius = 1,
    ~long, ~lat, popup = ~htmlEscape(facility))

Step 8: Create an interactive bar graph

Similar to how we created a custom interactive dot map, we can do the same for bar graphs.

#I'm going to create a tooltip first to make the bar graph interactive. 
heathospitalizations <- heathospitalizations %>% mutate(
    tooltip_text = paste0(year, " - ", value, "%"))

#Now, I'm going to use ggplot to create the bar graph.
bar_graph_ <- heathospitalizations %>% 
  filter(state=="Missouri") %>% 
  ggplot(aes(x=year, y=value, tooltip = tooltip_text, data_id = state)) + 
  geom_col_interactive(width=.7, fill="#EE9253", size = 0.2) + 
  theme_minimal() +
  theme(axis.text=element_text(size = 7), axis.title=element_text(size=7),        title=element_text(size=7)) +
   labs(title = "Percentage of heat-related illness hospitalizations in Missouri per 100k people",subtitle = "Data from CDC") +
   ylab("Percentage of heat hospitalizations") +
   xlab("Year") +
  geom_hline(yintercept = 2.010133,color="red", linewidth=.5)
  
#Girafe will let us make the graph interactive.
bar_graph <- girafe(ggobj = bar_graph_, width_svg = 5, height_svg = 3)
chloro_map <- girafe(ggobj = chloro_map, width_svg = 5, height_svg = 3)
Error:
! `ggobj` must be a ggplot2 plot
Backtrace:
 1. ggiraph::girafe(ggobj = chloro_map, width_svg = 5, height_svg = 3)

Step 9: Some basic analysis

coolingcentersclean %>% 
  filter(transportation == "Yes")
#No centers provide transportation

coolingcentersclean %>% 
  filter(ada_accessible == "No")
#All centers are ADA accessible

coolingcentersclean %>% 
  group_by(county) %>% 
  count(county) %>% 
  arrange(desc(n))
#Jackson County has the most cooling centers followed by St. Louis county and then Madison

coolingcentersclean %>% 
  filter(county == "Boone")
#Boone County has 6 cooling centers

coolingcentersclean %>% 
  filter(grepl("sun", hours_of_operation, ignore.case=T))
#58 centers are open on Sundays

heathospitalizations %>% 
  arrange(desc(value))
#California had the highest heat-related hospitalization rate in 2020; Missouri had the sixth highest rate in 2011 at 7.6.

heathospitalizations %>% 
  filter(state=="Missouri") %>% 
  arrange(desc(value))
#Missouri had its highest number of heat-related hospitalizations in 2011

heathospitalizations %>% 
  summarise(average = mean(value))
#The average percentage of heat-related illness hospitalizations from 2000 to 2021 using availble data was 2.010133%.

heathospitalizations %>% 
  group_by(state) %>% 
  summarise(average = mean(value)) %>% 
  arrange(desc(average))
#Missouri has the second highest rate of the data reported. 

projectedheat %>% 
  group_by(state) %>%
  summarise(average=mean(value)) %>% 
  arrange(desc(average))
#Missouri ranks tenth in "projected difference in extreme heat days as compared to the historical period" with an average across the state of about 24 days. 

Story Package

Important statistics and data analysis to include in the story:

In the last two decades, Missouri ranks in the top ten out of reporting states for the highest rate of heat-related illnesses per 100,000 people. In 2011, the rate in Missouri reached 7.6%, ranking it number 6 behind Arizona.

Missouri currently reports 538 cooling centers. 58 of those cooling centers are open on Sundays.

Graphs to include in the story:

animatedgraph

bar_graph + 
  transition_states(year,
  transition_length = 2,
  state_length = 1) +
  ease_aes('sine-in-out')
---
title: "R Notebook"
output: html_notebook
---

# **Advanced Data Journalism Final Project**

Data from: <https://ephtracking.cdc.gov/DataExplorer/?c=35&i=88&m=-1>

<https://ephtracking.cdc.gov/DataExplorer/?c=35&i=88&m=-1>

<https://public.tableau.com/app/profile/samantha2462/viz/shared/437ZHKXJZ>

https://moboscoc.org/resources/data/point-in-time-count-reports/


### Step 1: Load libraries

```{r}
library(tidyverse)
library(lubridate)
library(ggplot2)
library(dplyr)
library(janitor)
library(maps)
library(sf)
library(leaflet)
library(spData)
library(tidycensus)
library(ggiraph)
library(plotly)

#Animation Libraries
library(gganimate)
library(sp)
library(viridis)
library(htmltools)
library(gapminder)
library(gifski)
library(png)
library(tmap)


```

### Step 2: Load data

We will be using three data sets. One contains the coordinates and details for cooling centers serving Missouri and the other contains heat-related hospitalizations over the last several years. I also loaded population data from the Census.

```{r}
coolingcenters <- read_csv("Final Project/Missouri_Cooling_Centers_Sites.csv")

heathospitalizations <- read_csv("Final Project/heatrate.csv")

hud_mo <- read_csv("Final Project/HUDmo.csv")
```

### Step 3: Clean data

Clean & organize cooling centers data.

Our data needs to be properly --- and specifically --- formatted so it can be easily visualized.

```{r}
#First, I'm separating the latitude and longitude into separate boxes. 
coolingcentersclean <- coolingcenters %>% 
  separate(Location, sep=", ", into=c("latitude", "longitude"))

#I'm also cleaning up these boxes by removing the parentheses from them.
coolingcentersclean <- coolingcentersclean %>%
 mutate(lat = str_replace(latitude, "[(]", ""))

coolingcentersclean <- coolingcentersclean %>%
 mutate(long = str_replace(longitude, "[)]", ""))

#I'm going to change the state abbreviations to their names so it will be easier to combine with the states data.
coolingcentersclean <- coolingcentersclean %>% mutate(region = case_when(
  grepl("MO", state, ignore.case=T) ~ "missouri",
  grepl("IL", state, ignore.case=T) ~ "illinois",
  grepl("KS", state, ignore.case=T) ~ "kansas"))

#I'm going to remove the improperly formatting latitude and longitude from the dataset. 
coolingcentersclean <- subset (coolingcentersclean, select = -latitude)
coolingcentersclean <- subset (coolingcentersclean, select = -longitude)

#We'll need our coordinates to be numbers so we can plot them.
coolingcentersclean$lat <- as.numeric(coolingcentersclean$lat)
coolingcentersclean$long <- as.numeric(coolingcentersclean$long)

#Lastly, I'll be cleaning the names with janitor. 
coolingcentersclean <- clean_names(coolingcentersclean)
```

Clean & organize heat data

```{r}
#This dataset is relatively clean, so I'm just going to clean the names and then remove the column at the end.
heathospitalizations <- clean_names(heathospitalizations)
heathosptializations <- tolower(heathospitalizations$state)

heathospitalizations <- subset (heathospitalizations, select = -x6)
```

Clean & organize county pop data

```{r}
county_pop  <- clean_names(county_pop)
county_pop <- county_pop %>% separate(name, sep=",", into=c("county", "state"))
county_pop <- county_pop %>%  mutate(county = str_replace(county, " County", ""))
```

Clean & organize projected heat data

```{r}
projectedheat <- clean_names(projectedheat)
```

Clean & organize HUD data
```{r}
hud_mo <- clean_names(hud_mo)
hud_mo <- hud_mo %>% rename(unsheltered_total = measure_values)
```

### Step 4: Load map data

```{r}
states <- map_data("state") %>% filter(region=="missouri" | region == "illinois" | region == "kansas")

statescounties <- map_data("county")

mocounties <- map_data("county") %>% filter(region ==  "missouri")

countiesall <- map_data("county")
```

### Step 5: Create custom interactive dot map
By using county-level data from the maps package and plotly, we can create our own custom interactive dot map.

```{r}
staticdotmap_ <- ggplot(statescounties, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill = "#F8F9F9", color = "darkgray", linewidth=.1) +
  geom_point(data=coolingcentersclean, aes(group = NULL, text = paste("Location:", facility)),   alpha=.5, size=.7, color="#EE9253") +   
  coord_fixed(1.3) +
  theme_void() +
  labs(title="Cooling Centers in Missouri", caption = "Data from data.mo.gov") + theme_void()

staticdotmap <- ggplotly(staticdotmap_, tooltip = "text")
```

### Step 6: Create interactive dot map with leaflet
Leaflet is a simpler way to plot data interactively. 

```{r}
interactivedotmap <- leaflet() %>% 
    addProviderTiles(providers$CartoDB.Positron) %>%
    addCircleMarkers(
    data = coolingcentersclean,
    color = "#EE9253",
    opacity = 0.5,
    radius = 1,
    ~long, ~lat, popup = ~htmlEscape(facility))
```

### Step 8: Create an interactive bar graph
Similar to how we created a custom interactive dot map, we can do the same for bar graphs.

```{r}
#I'm going to create a tooltip first to make the bar graph interactive. 
heathospitalizations <- heathospitalizations %>% mutate(
    tooltip_text = paste0(year, " - ", value, "%"))

#Now, I'm going to use ggplot to create the bar graph.
bar_graph_ <- heathospitalizations %>% 
  filter(state=="Missouri") %>% 
  ggplot(aes(x=year, y=value, tooltip = tooltip_text, data_id = state)) + 
  geom_col_interactive(width=.7, fill="#EE9253", size = 0.2) + 
  theme_minimal() +
  theme(axis.text=element_text(size = 7), axis.title=element_text(size=7),        title=element_text(size=7)) +
   labs(title = "Percentage of heat-related illness hospitalizations in Missouri per 100k people",subtitle = "Data from CDC") +
   ylab("Percentage of heat hospitalizations") +
   xlab("Year") +
  geom_hline(yintercept = 2.010133,color="red", linewidth=.5)
  
#Girafe will let us make the graph interactive.
bar_graph <- girafe(ggobj = bar_graph_, width_svg = 5, height_svg = 3)

```


```{r}
hud_states <- 
  hud_mo %>% 
  mutate(county = str_to_lower(county)) %>% 
  right_join(mocounties, by = c("county" = "subregion"))

chloro_map <-ggplot(hud_states, aes(x = long, y = lat, group = group, fill = unsheltered_total)) +
  theme_minimal() +
  geom_polygon(colour = "black", linewidth = .1) +
  coord_fixed(1.3) +
  scale_fill_gradient2(low = "#FEF9E7", mid = "#F4D03F", high = "#7D6608") +
  labs(title = "HUD Title",subtitle = "Data from CDC") +
  theme(panel.background = element_rect(fill = '#F8F9F9', color = '#F8F9F9'))
```

### Step 9: Some basic analysis

```{r}
coolingcentersclean %>% 
  filter(transportation == "Yes")
#No centers provide transportation

coolingcentersclean %>% 
  filter(ada_accessible == "No")
#All centers are ADA accessible

coolingcentersclean %>% 
  group_by(county) %>% 
  count(county) %>% 
  arrange(desc(n))
#Jackson County has the most cooling centers followed by St. Louis county and then Madison

coolingcentersclean %>% 
  filter(county == "Boone")
#Boone County has 6 cooling centers

coolingcentersclean %>% 
  filter(grepl("sun", hours_of_operation, ignore.case=T))
#58 centers are open on Sundays

heathospitalizations %>% 
  arrange(desc(value))
#California had the highest heat-related hospitalization rate in 2020; Missouri had the sixth highest rate in 2011 at 7.6.

heathospitalizations %>% 
  filter(state=="Missouri") %>% 
  arrange(desc(value))
#Missouri had its highest number of heat-related hospitalizations in 2011

heathospitalizations %>% 
  summarise(average = mean(value))
#The average percentage of heat-related illness hospitalizations from 2000 to 2021 using availble data was 2.010133%.

heathospitalizations %>% 
  group_by(state) %>% 
  summarise(average = mean(value)) %>% 
  arrange(desc(average))
#Missouri has the second highest rate of the data reported. 

projectedheat %>% 
  group_by(state) %>%
  summarise(average=mean(value)) %>% 
  arrange(desc(average))
#Missouri ranks tenth in "projected difference in extreme heat days as compared to the historical period" with an average across the state of about 24 days. 

```

### Story Package

**Important statistics and data analysis to include in the story:**

In the last two decades, Missouri ranks in the top ten out of reporting states for the highest rate of heat-related illnesses per 100,000 people. In 2011, the rate in Missouri reached 7.6%, ranking it number 6 behind Arizona.

Missouri currently reports 538 cooling centers. 58 of those cooling centers are open on Sundays. 

**Graphs to include in the story:**

```{r}
staticdotmap
```
```{r}
interactivedotmap
```
```{r}
bar_graph
```
```{r}
chloro_map
```

```{r}
animatedgraph

bar_graph + 
  transition_states(year,
  transition_length = 2,
  state_length = 1) +
  ease_aes('sine-in-out')
```
